library(pacman)
p_load(arules, arulesViz, ggplot2)
dataset <- read.transactions("./AssociationRules.csv", sep=" ")
summary(size(dataset))
Min. 1st Qu. Median Mean 3rd Qu. Max.
1.000 7.000 10.000 9.806 12.000 25.000
itemFrequencyPlot(dataset, topN=10)
rules = apriori(data=dataset, parameter=list(support=0.01, confidence=0))
Apriori
Parameter specification:
Algorithmic control:
Absolute minimum support count: 100
set item appearances ...[0 item(s)] done [0.00s].
set transactions ...[98 item(s), 10000 transaction(s)] done [0.01s].
sorting and recoding items ... [89 item(s)] done [0.00s].
creating transaction tree ... done [0.00s].
checking subsets of size 1 2 3 4 5 done [0.03s].
writing ... [11524 rule(s)] done [0.00s].
creating S4 object ... done [0.01s].
summary(rules)
set of 11524 rules
rule length distribution (lhs + rhs):sizes
1 2 3 4 5
89 2952 7206 1272 5
Min. 1st Qu. Median Mean 3rd Qu. Max.
1.00 2.00 3.00 2.84 3.00 5.00
summary of quality measures:
support confidence coverage lift count
Min. :0.01000 Min. :0.0108 Min. :0.01000 Min. : 0.6717 Min. : 100.0
1st Qu.:0.01150 1st Qu.:0.1683 1st Qu.:0.03940 1st Qu.: 1.0076 1st Qu.: 115.0
Median :0.01400 Median :0.2490 Median :0.06330 Median : 1.1244 Median : 140.0
Mean :0.01891 Mean :0.2788 Mean :0.09342 Mean : 1.2284 Mean : 189.1
3rd Qu.:0.01990 3rd Qu.:0.3596 3rd Qu.:0.11040 3rd Qu.: 1.2803 3rd Qu.: 199.0
Max. :0.49480 Max. :1.0000 Max. :1.00000 Max. :19.4205 Max. :4948.0
mining info:
inspect(rules)
trules <- subset(rules, confidence >= 0.5)
summary(trules)
set of 1165 rules
rule length distribution (lhs + rhs):sizes
2 3 4 5
62 753 348 2
Min. 1st Qu. Median Mean 3rd Qu. Max.
2.000 3.000 3.000 3.249 4.000 5.000
summary of quality measures:
support confidence coverage lift count
Min. :0.01000 Min. :0.5000 Min. :0.0100 Min. : 1.011 Min. : 100.0
1st Qu.:0.01110 1st Qu.:0.5283 1st Qu.:0.0193 1st Qu.: 1.093 1st Qu.: 111.0
Median :0.01350 Median :0.5644 Median :0.0237 Median : 1.192 Median : 135.0
Mean :0.01816 Mean :0.5863 Mean :0.0321 Mean : 1.589 Mean : 181.6
3rd Qu.:0.01920 3rd Qu.:0.6143 3rd Qu.:0.0338 3rd Qu.: 1.424 3rd Qu.: 192.0
Max. :0.18770 Max. :1.0000 Max. :0.3699 Max. :19.420 Max. :1877.0
mining info:
inspect(trules)
# Some 10 sample
top_rules <- sample(rules, 10)
# High confidence rules
rules_conf <- sort(rules, by="confidence", decreasing=TRUE)
# High lift rules
rules_lift <- sort(rules, by="lift", decreasing=TRUE)
# Top 10 rules by confidence
top_rules_conf <- head(rules_conf, n=10)
# Top 10 rules by lift
top_rules_lift = head(rules_lift, n=10)
plot(rules,
method="scatterplot",
measure=c("support", "confidence"),
shading="lift",
jitter=0.2
)
plot(trules,
method="scatterplot",
measure=c("support", "confidence"),
shading="lift",
jitter=0.2
)
plot(rules,
method="scatterplot",
measure=c("support", "lift"),
shading="confidence",
jitter=0.2
)
plot(trules,
method="scatterplot",
measure=c("support", "lift"),
shading="confidence",
jitter=0.2
)
head(quality(trules))
plot(associa_rules,
method="scatterplot",
measure=c("support", "confidence"),
shading="lift",
engine="htmlwidget"
)
plot: Too many rules supplied. Only plotting the best 1000 rules using measure lift (change parameter max if needed)To reduce overplotting, jitter is added! Use jitter = 0 to prevent jitter.
plot(associa_rules,
method="scatterplot",
measure=c("support", "lift"),
shading="confidence",
engine="htmlwidget"
)
plot: Too many rules supplied. Only plotting the best 1000 rules using measure confidence (change parameter max if needed)To reduce overplotting, jitter is added! Use jitter = 0 to prevent jitter.
plot(top_rules_conf, method="paracoord")
plot(top_rules_lift, method="paracoord")
plot(top_rules_conf, method="matrix", measure="confidence")
Itemsets in Antecedent (LHS)
[1] "{item15,item30,item49}" "{item30,item49,item56}" "{item49,item56}"
[4] "{item15,item49,item56}" "{item49,item56,item84}" "{item15,item49,item84}"
[7] "{item15,item56,item77}"
Itemsets in Consequent (RHS)
[1] "{item30}" "{item84}" "{item15}" "{item56}"
Matrix based plot
plot(top_rules_lift, method="matrix", measure="lift")
Itemsets in Antecedent (LHS)
[1] "{item15,item30,item56}" "{item30,item56,item84}" "{item15,item30,item49}"
[4] "{item15,item56}" "{item15,item49}" "{item30,item49,item84}"
[7] "{item56,item84}" "{item15,item30,item84}" "{item15,item30,item77}"
[10] "{item30,item77,item84}"
Itemsets in Consequent (RHS)
[1] "{item56}" "{item49}"
plot(top_rules_lift, method="grouped")
plot(top_rules_conf, method="graph", engine="htmlwidget")
top_3_lift = head(top_rules_lift, n=3)
plot(top_3_lift, method="graph")
hight_trustly_rules <- subset(associa_rules, confidence > 0.8)
first_38_conf = head(sort(hight_trustly_rules, by="confidence", decreasing=TRUE), n=38)
plot(first_38_conf,
method="matrix",
shading=c("lift", "confidence"),
measure=c("lift", "confidence"),
control=list(reorder=FALSE)
)
Itemsets in Antecedent (LHS)
[1] "{item15,item49,item56}" "{item49,item56,item84}" "{item49,item56}"
[4] "{item15,item49,item84}" "{item30,item49,item56}" "{item15,item30,item49}"
[7] "{item15,item56,item77}" "{item15,item56,item84}" "{item49,item77,item84}"
[10] "{item16,item61,item77}" "{item20,item23}" "{item16,item34,item77}"
[13] "{item15,item49}" "{item5,item82,item99}" "{item3,item84,item95}"
[16] "{item13,item82,item99}" "{item23}" "{item25,item34,item77}"
[19] "{item82,item99}" "{item22,item3,item41}" "{item55}"
[22] "{item10,item44}" "{item83}" "{item23,item5}"
[25] "{item30,item56,item77}" "{item15,item30,item56}" "{item10,item22,item41}"
[28] "{item30,item49,item84}" "{item20,item25,item41}" "{item16,item25,item77}"
[31] "{item30,item95,item96}"
Itemsets in Consequent (RHS)
[1] "{item30}" "{item15}" "{item56}" "{item84}" "{item5}" "{item13}" "{item10}"
[8] "{item34}" "{item3}" "{item77}" "{item92}"